###############################################                                     
# Cause of Effect? Turnout in Hispanic Majority-Minority Districts
#  - John A. Henderson, Jasjeet S. Sekhon, and Rocio Titiunik
#  - Forthcoming in Political Analysis
#  - Replication file for <Figure 3>
#  - April 14, 2016
###############################################
            
options(width=150)
rm(list=ls())   

path='/local/'  
source(paste(path,'replicationPA/funs/headers.R',sep=''))
  
# LOAD BASELINE AND MATCHED DATA

#~/Dropbox/ethnicity/replication-files/replication-Baseline-Matched-datasets-08-16-2012-version/' 
set.seed(57459)

#WH2data <- read.csv(file = paste(path,"Baseline-afterMATCH.csv",sep=''), header = TRUE, stringsAsFactors = FALSE)
#WH2data <- read.dta(file = paste(path,"replicationPA/data/Baseline-afterMATCH-recreated.dta",sep=''))          
load(file = paste(path,"replicationPA/data/Baseline-afterMATCH-recreated.Rdata",sep=''))          
dim(WH2data)   

WH2data=WH2data[which(WH2data$dfound_pair_newdata==1),]  
dim(WH2data)
      
#Sdata <-  read.dta(file = paste(path,"replicationPA/data/Matched-recreated.dta",sep=''))
load(file = paste(path,"replicationPA/data/Matched-recreated.Rdata",sep=''))   
dim(Sdata)
      
Sdata=Sdata[which(Sdata$dfound_pair_newdata==1),]
dim(Sdata)

WH2data$hvap=WH2data$pop_hispanic18 
WH2data$nhvap=WH2data$vap-WH2data$hvap

WH2data$phh_income99_39less  = WH2data$phh_income99_0to19    + WH2data$phh_income99_20to39
WH2data$phh_income99_40to74  = WH2data$phh_income99_40to59   + WH2data$phh_income99_60to74
WH2data$phh_income99_100plus = WH2data$phh_income99_100to199 + WH2data$phh_income99_200
WH2data$ppop_foreign         = WH2data$ppop_foreign_naturcit + WH2data$ppop_foreign_nocit

WH2data$y98_reg_nhisp		 = WH2data$y98_reg_tot - WH2data$y98_reg_hisp 
WH2data$y98_preg_nhisp		 = WH2data$y98_reg_nhisp/WH2data$y98_reg_tot
       
WH2data$y00_turn_nhisp       = WH2data$y00_turn_tot - WH2data$y00_turn_hisp
WH2data$y00_reg_nhisp        = WH2data$y00_reg_tot - WH2data$y00_reg_hisp

WH2data$y02_turn_nhisp       = WH2data$y02_turn_tot - WH2data$y02_turn_hisp
WH2data$y02_reg_nhisp        = WH2data$y02_reg_tot  - WH2data$y02_reg_hisp                   

WH2data$y04_turn_nhisp       = WH2data$y04_turn_tot - WH2data$y04_turn_hisp
WH2data$y04_reg_nhisp        = WH2data$y04_reg_tot  - WH2data$y04_reg_hisp                                                                           
                                                                          
WH2data$y06_turn_nhisp       = WH2data$y06_turn_tot - WH2data$y06_turn_hisp
WH2data$y06_reg_nhisp        = WH2data$y06_reg_tot  - WH2data$y06_reg_hisp

Sdata$hvap=Sdata$pop_hispanic18 
Sdata$nhvap=Sdata$vap-Sdata$hvap  

Sdata$phh_income99_39less  = Sdata$phh_income99_0to19    + Sdata$phh_income99_20to39
Sdata$phh_income99_40to74  = Sdata$phh_income99_40to59   + Sdata$phh_income99_60to74
Sdata$phh_income99_100plus = Sdata$phh_income99_100to199 + Sdata$phh_income99_200
Sdata$ppop_foreign         = Sdata$ppop_foreign_naturcit + Sdata$ppop_foreign_nocit

Sdata$y98_reg_nhisp			 = Sdata$y98_reg_tot - Sdata$y98_reg_hisp
Sdata$y98_preg_nhisp		 = Sdata$y98_reg_nhisp/Sdata$y98_reg_tot

Sdata$y00_turn_nhisp       = Sdata$y00_turn_tot - Sdata$y00_turn_hisp
Sdata$y00_reg_nhisp        = Sdata$y00_reg_tot - Sdata$y00_reg_hisp

Sdata$y02_turn_nhisp       = Sdata$y02_turn_tot - Sdata$y02_turn_hisp
Sdata$y02_reg_nhisp        = Sdata$y02_reg_tot  - Sdata$y02_reg_hisp                   

Sdata$y04_turn_nhisp       = Sdata$y04_turn_tot - Sdata$y04_turn_hisp
Sdata$y04_reg_nhisp        = Sdata$y04_reg_tot  - Sdata$y04_reg_hisp                                                                           
                                                                          
Sdata$y06_turn_nhisp       = Sdata$y06_turn_tot - Sdata$y06_turn_hisp
Sdata$y06_reg_nhisp        = Sdata$y06_reg_tot  - Sdata$y06_reg_hisp         

names(Sdata)[which(names(Sdata)=='tr')]='Tr'
names(WH2data)[which(names(WH2data)=='tr')]='Tr'

  
# CONSTRUCT NHISP 2000 OUTCOME FOR WH2DATA    

#WH2data$y04_reg_nhisp-WH2data$y04_reg_tot-WH2data$y04_reg_hisp
#WH2data$y04_turn_nhisp=WH2data$y04_turn_tot-WH2data$y04_turn_hisp
  
# CONSTRUCT NHISP 2000 OUTCOME FOR SDATA

#Sdata$y04_reg_nhisp=Sdata$y04_reg_tot-Sdata$y04_reg_hisp
#Sdata$y04_turn_nhisp=Sdata$y04_turn_tot-Sdata$y04_turn_hisp


# Conditioning/Balance set: group most important variables first
# HERE
imp.vars <- data.frame(
Sdata$vap,
Sdata$ppop_black18,
Sdata$ppop_hispanic18,
Sdata$phh_income99_39less,
Sdata$phh_income99_40to74,
Sdata$phh_income99_100plus,                   
Sdata$ppop_25_hsless,
Sdata$ppop_foreign

)
# Registration variables: include only 1998
reg.vars <- data.frame(
Sdata$y98_preg_tot,                                              
Sdata$y98_preg_hisp, # no turnout info for 1998, so this is the closest we have to previous outcome
Sdata$y98_preg_dem,
Sdata$y98_preg_rep                       
)
dim(reg.vars)
cat("Final dimension of reg.vars: ", dim(reg.vars), "\n")

# Vote variables

vote.vars <- data.frame(
Sdata$y98_pvote_ussdem,        # statewide and local offices
Sdata$y98_pvote_govdem,
Sdata$y98_pvote_cngdem,                        
Sdata$y98_pvote_assdem,
Sdata$y98_pvote_atgdem
)

dim(vote.vars)
cat("Final dimension of vote.vars: ", dim(vote.vars), "\n")

# Population variables
pop.vars <- data.frame(
Sdata$ppop_fem,
Sdata$ppop_25to44,
Sdata$ppop_45to59,
Sdata$ppop_70older
)
cat("Final dimension of pop.vars: ", dim(pop.vars), "\n")


Xall <- data.frame (imp.vars,reg.vars, vote.vars, pop.vars)  # exclude Sdata$DisTri_1991 since we will keep one unique triplet in every file
X <- data.frame (imp.vars)  # exclude Sdata$DisTri_1991 since we will keep one unique triplet in every file
B <- X
dim(X)

    
# WH2data treatment 
Tr <- WH2data$Tr          
m1=c()
m1$index.treated=which(Tr==T)
m1$index.control=which(Tr==F)           
print(table(Tr)) 
    
# Sdata treatment 
Tr <- Sdata$Tr
m2=c()
m2$index.treated=which(Tr==T)
m2$index.control=which(Tr==F)           
print(table(Tr))
                          
######################################################################
#     
#     Dataset 2: Sdata
#     
######################################################################

#####################
#  QQ Plots 2000
#####################

# matched   
      
Y  <- Sdata$y00_turn_hisp/Sdata$y00_reg_hisp
Tr <- Sdata$Tr

Y[which( Sdata$y00_turn_hisp==999999 | Sdata$y00_reg_hisp==0)] <- NA

print("Mean Turnout Dif: QQ 2000")
print("   Treated   ")
mean(Y[m2$index.treated],na.rm=T)
print("   Control   ")
mean(Y[m2$index.control],na.rm=T)

weighted.mean(Y[m2$index.treated],na.rm=T,w=(WH2data$y00_reg_hisp/WH2data$y00_reg_tot)[m2$index.treated])                                       
weighted.mean(Y[m2$index.control],na.rm=T,w=(WH2data$y00_reg_hisp/WH2data$y00_reg_tot)[m2$index.control])
                                            
pdf(file = paste(path,"replicationPA/Figure3/matched_2000-house-HVAP_REG_Final.pdf",sep=''))   
	qqplot(x=Y[m2$index.treated], y=Y[m2$index.control],
	xlab=paste("Blocks to be moved from Non-Hispanic White incumbent to Hispanic incumbent",sep=""),
	ylab=paste("Blocks with same Non-Hispanic White incumbent before and after",sep=""),
	xlim=c(0,1), ylim=c(0,1),
	)
	abline(a=0,b=1,col="red",lty=1)
	n1=mean(Y[m1$index.treated],na.rm=T)
	n0=mean(Y[m1$index.control],na.rm=T)   
	legend(cex=1.25,x=c(0,0),y=c(.9,.7),pch=c('-'),
	legend=c(paste('Mean Treated: ' ,round(n1,digits=2) ,sep=''),
			 paste('Mean Control: ' ,round(n0,digits=2) ,sep='')),box.col='white')   
	rect(xleft=0,xright=.425,ytop=.9,ybottom=.745)   
dev.off()

# End